package com.mycompany.sentimentanalysis;


import org.datavec.api.util.ClassPathResource;
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors;
import org.deeplearning4j.text.documentiterator.LabelsSource;
import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;


import java.io.File;
import static java.lang.System.out;

/**
 *
 * @author [email protected]
 * adapted by Jennifer Reese
 */
public class ClassifyBySimilarity {


    public static void main(String[] args) throws Exception {
        ClassPathResource srcFile = new ClassPathResource("/raw_sentences.txt");
        File file = srcFile.getFile();
        SentenceIterator iter = new BasicLineIterator(file);
        
        TokenizerFactory tFact = new DefaultTokenizerFactory();
        tFact.setTokenPreProcessor(new CommonPreprocessor());

        LabelsSource labelFormat = new LabelsSource("LINE_");

        ParagraphVectors vec = new ParagraphVectors.Builder()
                .minWordFrequency(1)
                .iterations(5)
                .epochs(1)
                .layerSize(100)
                .learningRate(0.025)
                .labelsSource(labelFormat)
                .windowSize(5)
                .iterate(iter)
                .trainWordVectors(false)
                .tokenizerFactory(tFact)
                .sampling(0)
                .build();

        vec.fit();

        double similar1 = vec.similarity("LINE_9835", "LINE_12492");
        out.println("Comparing lines 9836 & 12493 ('This is my house .'/'This is my world .') Similarity = " + similar1);


        double similar2 = vec.similarity("LINE_3720", "LINE_16392");
        out.println("Comparing lines 3721 & 16393 ('This is my way .'/'This is my work .') Similarity = " + similar2);

        double similar3 = vec.similarity("LINE_6347", "LINE_3720");
        out.println("Comparing lines 6348 & 3721 ('This is my case .'/'This is my way .') Similarity = " + similar3);

        double dissimilar1 = vec.similarity("LINE_3720", "LINE_9852");
        out.println("Comparing lines 3721 & 9853 ('This is my way .'/'We now have one .') Similarity = " + dissimilar1);
        
        double dissimilar2 = vec.similarity("LINE_3720", "LINE_3719");
        out.println("Comparing lines 3721 & 3720 ('This is my way .'/'At first he says no .') Similarity = " + dissimilar2);
        
        
        
    }
}